Great! At this point, I’ve run a Matlab script on the children’s raw data to collect XY gaze data for each video/trial they viewed. We start with 68 (now 79 after Dec/Jan batch of 11 kids) children. Here they are.
# Libraries
library(tidyverse)
library(feather)
library(lme4)
library(grid)
library(png)
library(lmerTest)
#library(cowplot)
# Import data (and fix one participant name, and fix Owen as EnglishExposed)
data <- read_feather("../Child Data/childxydata.feather") %>%
mutate(x = na_if(x, "NaN"),
y = na_if(y, "NaN"))
# Get ages
ages <- read_csv("childrenages.csv")
data <- data %>% left_join(ages, by = "participant")
data %>% select(participant,language,age) %>% distinct() %>% arrange(age) # print data tableI excluded all kids that were not included in the AOI analysis. Here is a list of all of ’em!
# Load included babies and children lists
included_babies <- read_feather("cleanedbabyeyedata.feather") %>%
select(participant) %>%
distinct()
included_children <- read_feather("cleanedchildeyedata.feather") %>%
select(participant) %>%
distinct()
included <- rbind(included_babies, included_children)
# Use antijoin to see excluded kids
excluded <- anti_join(data, included, by = "participant") %>%
select(participant, language, age) %>%
distinct()
# Print table
excluded# Remove excluded kids from main dataset
data <- semi_join(data, included, by = "participant")Let’s see a histogram of ages! After this I’ll add “baby” and “child” variables so all < 2.0 are identified as babies.
# Histogram of ages
data %>% select(participant,language,age) %>%
distinct() %>%
ggplot(aes(x = age)) + geom_histogram(fill = "royalblue", binwidth = 0.25) + ggtitle("Ages in Full Dataset")# Add baby/child agegroup column
data <- data %>%
mutate(agegroup = age < 2.0)
data$agegroup <- as.factor(data$agegroup)
data$agegroup <- fct_recode(data$agegroup, baby = "TRUE", child = "FALSE")And our participant table.
participants_b <- data %>%
filter(agegroup=="baby") %>%
select(participant, gender, language, age) %>%
distinct()
participants_b_n <- participants_b %>%
count(gender, language) %>%
spread(gender, n)
participants_b_age <- participants_b %>%
group_by(language) %>%
summarise(age_m = round(mean(age), 1),
age_sd = round(sd(age), 1),
age_min = range(age)[1],
age_max = range(age)[2]) %>%
mutate(age_range = paste(age_min, age_max, sep = " - ")) %>%
select(-age_min, -age_max) %>%
mutate(age_mean = paste(age_m, age_sd, sep = "±")) %>%
select(-age_m, -age_sd) %>%
select(language, age_mean, age_range)
participants_table_b <- left_join(participants_b_n, participants_b_age, by = "language") %>%
add_column(agegroup = "baby")
participants_c <- data %>%
filter(agegroup=="child") %>%
select(participant, gender, language, age) %>%
distinct()
participants_c_n <- participants_c %>%
count(gender, language) %>%
spread(gender, n)
participants_c_age <- participants_c %>%
group_by(language) %>%
summarise(age_m = round(mean(age), 1),
age_sd = round(sd(age), 1),
age_min = range(age)[1],
age_max = range(age)[2]) %>%
mutate(age_range = paste(age_min, age_max, sep = " - ")) %>%
select(-age_min, -age_max) %>%
mutate(age_mean = paste(age_m, age_sd, sep = "±")) %>%
select(-age_m, -age_sd) %>%
select(language, age_mean, age_range)
participants_table_c <- left_join(participants_c_n, participants_c_age, by = "language") %>%
add_column(agegroup = "child")
rbind(participants_table_b, participants_table_c) %>%
select(language, agegroup, Female, Male, age_mean, age_range)Great. Let’s save this as `cleanedchildxydata.csv’.
# Pull apart condition columns
data <- data %>%
separate(condition, into = c("story", "clipnum", "direction", "media"), sep = "_") %>%
unite(story, clipnum, col = "story", sep = "_") %>%
select(-media)
# A bit more cleaning up
data <- data %>%
mutate(direction = case_when(
direction == "FW" ~ "forward",
direction == "ER" ~ "reversed"
)) %>%
mutate(language = case_when(
language == "SignLanguageExposed" ~ "SE",
language == "EnglishExposed" ~ "NSE"
)) %>%
mutate(group = as.factor(group),
gender = as.factor(gender),
language = as.factor(language),
story = as.factor(story),
direction = as.factor(direction))
# Save as csv and feather (feather preserves column types for R)
write_csv(data,"../Child Data/cleanedchildxydata.csv")
write_feather(data,"../Child Data/cleanedchildxydata.feather")Do we need to do any other cleanup? I don’t think so.
First, let’s trim each participant’s data, getting rid of the first 60 samples (0.5 secs). Then we’ll get the the mean x and y coordinate for each story for each participant.
# Just to load data again
data <- read_feather("../Child Data/cleanedchildxydata.feather")
data <- data %>%
group_by(participant,trial) %>%
slice(60:n())
data_central_tendencies <- data %>%
group_by(language, agegroup, participant, trial) %>%
summarise(mean_x = mean(x,na.rm=TRUE),
mean_y = mean(y,na.rm=TRUE),
median_x = median(x, na.rm=TRUE),
median_y = median(y, na.rm=TRUE),
diff_x = mean_x - median_x,
diff_y = mean_y - median_y)
means <- data_central_tendencies %>%
group_by(language, agegroup, participant) %>%
summarise(mean_x = mean(mean_x, na.rm = TRUE),
mean_y = mean(mean_y, na.rm = TRUE)) %>%
group_by(language, agegroup) %>%
summarise(sd_x = sd(mean_x),
sd_y = sd(mean_y),
n = n(),
mean_x = mean(mean_x),
mean_y = mean(mean_y)*-1,
se_x = sd_x/sqrt(n),
se_y = sd_y/sqrt(n))
meansmeans_error <- means %>%
select(-n, -sd_x, -sd_y) %>%
gather(measure, value, mean_x:se_y) %>%
separate(measure, into = c("measure","axis")) %>%
spread(measure, value)
means_error %>%
filter(axis == "x") %>%
ggplot(aes(x = agegroup, y = mean, color = language, group = language)) +
geom_point(position = position_dodge(width = 0.4)) +
geom_errorbar(aes(ymin = mean-se, ymax = mean+se),
position = position_dodge(width = 0.4), width = 0.25, size = 0.5) +
scale_y_continuous(limits = c(0,1080)) +
coord_flip() +
labs(y = "mean along x axis", title = "X-Axis Means")means_error %>%
filter(axis == "y") %>%
ggplot(aes(x = agegroup, y = mean, color = language, group = language)) +
geom_point(position = position_dodge(width = 0.4)) +
geom_errorbar(aes(ymin = mean-se, ymax = mean+se),
position = position_dodge(width = 0.4), width = 0.25, size = 0.5) +
scale_y_continuous(limits = c(-720,0)) +
labs(y = "mean along y axis", title = "Y-Axis Means")But is the y-value distribution unimodal, bimodal, normal, what? Do the means represent the only peak? Let’s get histograms.
ggplot(data, aes(x = y)) + geom_histogram(binwidth = 10) + facet_grid(agegroup ~ language) +
ggtitle("Histograms of all y-values in all stories")Maybe the mixture of stories and directions throws off the histograms. Let’s break it down by “mark” which is an unique number I assigned to each story/direction. Below is a “guide” for each mark.
ggplot(data, aes(x = y)) + geom_histogram(binwidth = 10) + facet_grid(mark ~ agegroup) +
ggtitle("Histograms of all y-values by each story/mark")data %>% ungroup() %>% select(mark, story, direction) %>% distinct() %>% arrange(mark)Still seems mostly unimodal (that means one peak, right?).
But is the data skewed? I’ve been wondering if we should be using MEDIANS because there can be some extreme x and y values. But Rain said there’s been criticism of using medians and that means are better overall. Let’s have a look.
The first chart shows the difference between the mean and the median for each participant and trial. Positive means the mean is bigger than the median, negative means the median is bigger. It shows there is some skew for the y-axis…but the vast majority of differences is less than 50 px so maybe it’s okay.
The second chart shows the means and medians themselves. And the spread is pretty similar between mean and median so maybe using means is fine.
data_central_tendencies %>%
gather(measure, value, diff_x:diff_y) %>%
ggplot(aes(x = value)) + geom_histogram() + facet_grid(. ~ measure)data_central_tendencies %>%
gather(measure, value, mean_x:median_y) %>%
separate(measure, into = c("measure","axis")) %>%
ggplot(aes(x = value)) + geom_histogram() + facet_grid(measure ~ axis)Let’s run a LMM on the means. First, x means for babies.
means <- data %>%
group_by(language, agegroup, participant, age, story, direction, trial, repetition) %>%
summarise(x = mean(x, na.rm = TRUE),
y = mean(y, na.rm = TRUE))
means$repetition = as.factor(means$repetition)
means$trial = as.factor(means$trial)
lmm_baby_mean_x <- lmer(x ~ language * direction + age +
(1|story) + (1|participant) + (1|repetition) + (1|trial),
data = filter(means, agegroup == "baby"))
summary(lmm_baby_mean_x)Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: x ~ language * direction + age + (1 | story) + (1 | participant) +
(1 | repetition) + (1 | trial)
Data: filter(means, agegroup == "baby")
REML criterion at convergence: 4091.1
Scaled residuals:
Min 1Q Median 3Q Max
-4.6447 -0.4455 0.0216 0.4952 3.5964
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 6.914e+02 2.629e+01
trial (Intercept) 1.483e+01 3.852e+00
story (Intercept) 6.060e+01 7.785e+00
repetition (Intercept) 9.096e-14 3.016e-07
Residual 7.018e+02 2.649e+01
Number of obs: 429, groups: participant, 28; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 529.988 13.940 28.056 38.019 <2e-16 ***
languageSE -1.961 11.383 27.812 -0.172 0.864
directionreversed 2.125 3.712 94.056 0.572 0.568
age 4.344 16.991 25.024 0.256 0.800
languageSE:directionreversed -1.505 5.174 380.678 -0.291 0.771
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) lnggSE drctnr age
languageSE -0.018
dirctnrvrsd -0.127 0.133
age -0.838 -0.346 -0.002
lnggSE:drct 0.078 -0.225 -0.607 0.000
Y means for babies
lmm_baby_mean_y <- lmer(y ~ language * direction + age +
(1|story) + (1|participant) + (1|repetition) + (1|trial),
data = filter(means, agegroup == "baby"))
summary(lmm_baby_mean_y)Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: y ~ language * direction + age + (1 | story) + (1 | participant) +
(1 | repetition) + (1 | trial)
Data: filter(means, agegroup == "baby")
REML criterion at convergence: 4472.2
Scaled residuals:
Min 1Q Median 3Q Max
-4.7629 -0.5152 -0.0865 0.3686 7.5235
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 987.93 31.431
trial (Intercept) 0.00 0.000
story (Intercept) 156.15 12.496
repetition (Intercept) 54.43 7.378
Residual 1793.08 42.345
Number of obs: 429, groups: participant, 28; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 359.035 18.093 26.479 19.844 <2e-16 ***
languageSE -11.917 14.183 29.658 -0.840 0.408
directionreversed -5.524 5.497 394.338 -1.005 0.315
age -27.485 20.794 24.853 -1.322 0.198
languageSE:directionreversed -1.133 8.269 391.444 -0.137 0.891
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) lnggSE drctnr age
languageSE -0.028
dirctnrvrsd -0.144 0.185
age -0.790 -0.339 -0.003
lnggSE:drct 0.096 -0.289 -0.656 0.000
X means for children
lmm_child_mean_x <- lmer(x ~ language * direction + age +
(1|story) + (1|participant) + (1|repetition) + (1|trial),
data = filter(means, agegroup == "child"))
summary(lmm_child_mean_x)Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: x ~ language * direction + age + (1 | story) + (1 | participant) +
(1 | repetition) + (1 | trial)
Data: filter(means, agegroup == "child")
REML criterion at convergence: 5205.9
Scaled residuals:
Min 1Q Median 3Q Max
-6.5624 -0.4214 -0.0220 0.4497 5.6293
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 2.444e+02 15.63444
trial (Intercept) 8.803e-04 0.02967
story (Intercept) 2.706e+01 5.20181
repetition (Intercept) 1.265e+01 3.55607
Residual 6.995e+02 26.44827
Number of obs: 549, groups: participant, 36; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 517.4405 9.7123 34.0618 53.277 <2e-16 ***
languageSE 5.8790 6.2237 44.6552 0.945 0.350
directionreversed -0.3100 3.5974 134.3702 -0.086 0.931
age 1.4262 1.5807 33.1254 0.902 0.373
languageSE:directionreversed 0.5937 4.6868 500.1141 0.127 0.899
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) lnggSE drctnr age
languageSE -0.332
dirctnrvrsd -0.185 0.294
age -0.811 -0.052 -0.002
lnggSE:drct 0.141 -0.375 -0.772 0.004
Y means for children
lmm_child_mean_y <- lmer(y ~ language * direction + age +
(1|story) + (1|participant) + (1|repetition) + (1|trial),
data = filter(means, agegroup == "child"))
summary(lmm_child_mean_y)Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: y ~ language * direction + age + (1 | story) + (1 | participant) +
(1 | repetition) + (1 | trial)
Data: filter(means, agegroup == "child")
REML criterion at convergence: 5906.3
Scaled residuals:
Min 1Q Median 3Q Max
-4.9767 -0.4291 -0.0869 0.3004 5.7854
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 873.4 29.55
trial (Intercept) 0.0 0.00
story (Intercept) 208.7 14.45
repetition (Intercept) 141.2 11.88
Residual 2510.9 50.11
Number of obs: 549, groups: participant, 36; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 318.319 19.986 19.051 15.927 1.82e-12 ***
languageSE -13.635 11.779 44.539 -1.158 0.253
directionreversed 6.127 6.856 510.618 0.894 0.372
age -2.228 2.989 32.917 -0.745 0.461
languageSE:directionreversed 1.717 8.917 510.201 0.193 0.847
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) lnggSE drctnr age
languageSE -0.306
dirctnrvrsd -0.172 0.296
age -0.745 -0.052 -0.002
lnggSE:drct 0.131 -0.377 -0.774 0.004
Let’s try it with both kids and babies.
lmm_all_mean_x <- lmer(x ~ language * direction * agegroup +
(1|story) + (1|participant) + (1|repetition) + (1|trial),
data = means)
summary(lmm_all_mean_x)Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: x ~ language * direction * agegroup + (1 | story) + (1 | participant) +
(1 | repetition) + (1 | trial)
Data: means
REML criterion at convergence: 9313.6
Scaled residuals:
Min 1Q Median 3Q Max
-6.4272 -0.4439 -0.0096 0.4477 5.5399
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 426.340 20.648
trial (Intercept) 10.925 3.305
story (Intercept) 33.390 5.778
repetition (Intercept) 6.443 2.538
Residual 704.356 26.540
Number of obs: 978, groups: participant, 64; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 524.0869 6.5677 46.3558 79.798 <2e-16 ***
languageSE 6.7436 7.7079 72.8289 0.875 0.385
directionreversed 0.5448 3.7561 338.0867 0.145 0.885
agegroupbaby 9.2400 8.1821 72.3998 1.129 0.263
languageSE:directionreversed -0.6805 4.6674 899.1420 -0.146 0.884
languageSE:agegroupbaby -7.6835 11.6152 72.5026 -0.661 0.510
directionreversed:agegroupbaby 0.9396 4.9073 895.3416 0.191 0.848
languageSE:directionreversed:agegroupbaby -0.8171 6.9707 896.1100 -0.117 0.907
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) lnggSE drctnr aggrpb lnggSE:d lnggSE:g drctn:
languageSE -0.686
dirctnrvrsd -0.288 0.223
agegroupbby -0.645 0.550 0.207
lnggSE:drct 0.211 -0.301 -0.730 -0.167
lnggSE:ggrp 0.455 -0.664 -0.148 -0.705 0.200
drctnrvrsd: 0.197 -0.168 -0.681 -0.297 0.549 0.210
lnggSE:drc: -0.141 0.202 0.488 0.211 -0.669 -0.298 -0.708
lmm_all_mean_y <- lmer(y ~ language * direction * agegroup +
(1|story) + (1|participant) + (1|repetition) + (1|trial),
data = means)
summary(lmm_all_mean_y)Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: y ~ language * direction * agegroup + (1 | story) + (1 | participant) +
(1 | repetition) + (1 | trial)
Data: means
REML criterion at convergence: 10397.5
Scaled residuals:
Min 1Q Median 3Q Max
-5.3388 -0.4505 -0.1106 0.3424 6.7352
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 9.301e+02 3.050e+01
trial (Intercept) 1.996e-13 4.468e-07
story (Intercept) 1.801e+02 1.342e+01
repetition (Intercept) 1.020e+02 1.010e+01
Residual 2.202e+03 4.692e+01
Number of obs: 978, groups: participant, 64; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 307.645 12.460 8.003 24.692 7.7e-09 ***
languageSE -14.666 11.821 77.430 -1.241 0.2185
directionreversed 5.267 6.320 909.255 0.833 0.4049
agegroupbaby 32.579 12.541 76.803 2.598 0.0112 *
languageSE:directionreversed 2.604 8.259 908.011 0.315 0.7526
languageSE:agegroupbaby -3.402 17.805 76.958 -0.191 0.8490
directionreversed:agegroupbaby -10.804 8.677 905.014 -1.245 0.2134
languageSE:directionreversed:agegroupbaby -4.238 12.328 905.291 -0.344 0.7311
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) lnggSE drctnr aggrpb lnggSE:d lnggSE:g drctn:
languageSE -0.555
dirctnrvrsd -0.256 0.271
agegroupbby -0.522 0.551 0.251
lnggSE:drct 0.197 -0.348 -0.769 -0.193
lnggSE:ggrp 0.369 -0.664 -0.180 -0.705 0.231
drctnrvrsd: 0.183 -0.194 -0.715 -0.343 0.549 0.243
lnggSE:drc: -0.132 0.233 0.514 0.243 -0.669 -0.344 -0.708
No difference in the mean looking position for x or y in children or babies! But if we put children and babies in the same dataset we get a significant main effect of children vs. babies. Okay.
And I can get x or y plots of one participant across 8 stories. Let’s do Emmet. We’ll set the x and y limits to the whole width of the Tobii monitor (1600x1200…or is it now 1080x720). But because Tobii considers (0,0) to be the upper left corner (and not the bottom left corner), we also need to flip the y axis.
emmet <- filter(data,participant=="emmet_12_10_12_CODA") %>% mutate(y = y*-1)
ggplot(emmet,aes(x=x,y=y,color=story)) + geom_point(size=0.1) + geom_path() + facet_grid(repetition ~ story) + guides(color="none") + scale_x_continuous(limit=c(0,1080)) + scale_y_continuous(limit=c(-720,0))Cool, yeah?
To measure viewing space, we can use standard deviation or IQR. Generally, if we’re using means, we should use standard deviations. If we’re using medians, we should use IQR. That’s my thinking, anyway.
We’ll try SDs first. Let’s try the first SD, which is the middle 68% of the data.
sd <- data %>%
group_by(participant, trial) %>%
summarise(mean_x = mean(x, na.rm = TRUE),
mean_y = mean(y, na.rm = TRUE),
sd_x = sd(x, na.rm = TRUE),
sd_y = sd(y, na.rm = TRUE)) %>%
ungroup()
head(sd,10)# join participant info back
participantinfo <- data %>%
select(participant, trial, age, group, agegroup, gender, language, story, direction, mark, repetition) %>%
distinct()
sd <- left_join(sd, participantinfo, by = c("participant","trial"))And check out the histograms. I truncated the y-axis at 50 counts to better see outliers.
sd %>%
gather(axis,sd,sd_x:sd_y) %>%
ggplot(aes(x=sd,fill=axis)) + geom_histogram() + facet_grid(axis~.) +
coord_cartesian(ylim = c(0,50))So there are some really high outliers where the SD is 150 or 200 pixels in one direction (so a spread of as high as 400 pixels, which is a lot! I want to see those cases to see if they should be taken out or if we don’t need to worry about them.
It may be useful to think about getting rid of outliers. We should keep this in mind…
xoutliers <- sd %>%
arrange(desc(sd_x)) %>%
slice(1:20)
youtliers <- sd %>%
arrange(desc(sd_y)) %>%
slice(1:20)
xoutliersyoutliersFirst, does reversal and language experience have an effect on the SD? Babies, x-axis first.
sd$trial <- as.factor(sd$trial)
sd$repetition <- as.factor(sd$repetition)
sd_x_baby <- lmer(sd_x ~ direction * language + age + (1|participant) + (1|story) + (1|repetition) + (1|trial), data = filter(sd, agegroup == "baby"))
summary(sd_x_baby)Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: sd_x ~ direction * language + age + (1 | participant) + (1 |
story) + (1 | repetition) + (1 | trial)
Data: filter(sd, agegroup == "baby")
REML criterion at convergence: 4031.8
Scaled residuals:
Min 1Q Median 3Q Max
-2.1433 -0.5660 -0.1759 0.4209 6.1048
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 128.49 11.335
trial (Intercept) 0.00 0.000
story (Intercept) 23.15 4.811
repetition (Intercept) 0.00 0.000
Residual 679.40 26.065
Number of obs: 429, groups: participant, 28; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 58.666 6.950 30.929 8.441 1.58e-09 ***
directionreversed -1.729 3.374 398.351 -0.512 0.6086
languageSE -3.013 5.932 37.493 -0.508 0.6145
age -14.763 8.214 25.074 -1.797 0.0844 .
directionreversed:languageSE 1.570 5.087 393.298 0.309 0.7579
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) drctnr lnggSE age
dirctnrvrsd -0.231
languageSE -0.066 0.273
age -0.813 -0.004 -0.320
drctnrvr:SE 0.154 -0.656 -0.425 0.000
That’s fine, we’re not exactly predicting changes along the x-axis. The y-axis is what we are really interested in! :)
sd_y_baby <- lmer(sd_y ~ direction * language + age + (1|participant) + (1|story) + (1|repetition) + (1|trial), data = filter(sd, agegroup == "baby"))
summary(sd_y_baby)Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: sd_y ~ direction * language + age + (1 | participant) + (1 |
story) + (1 | repetition) + (1 | trial)
Data: filter(sd, agegroup == "baby")
REML criterion at convergence: 4152.3
Scaled residuals:
Min 1Q Median 3Q Max
-2.1879 -0.6675 -0.1022 0.5334 5.5186
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 171.10 13.081
trial (Intercept) 0.00 0.000
story (Intercept) 24.35 4.934
repetition (Intercept) 0.00 0.000
Residual 904.93 30.082
Number of obs: 429, groups: participant, 28; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 74.335 7.970 30.174 9.327 2.14e-10 ***
directionreversed 1.496 3.892 398.587 0.384 0.701
languageSE -11.333 6.846 37.200 -1.656 0.106
age -13.111 9.479 24.869 -1.383 0.179
directionreversed:languageSE 2.984 5.871 393.005 0.508 0.612
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) drctnr lnggSE age
dirctnrvrsd -0.232
languageSE -0.067 0.273
age -0.818 -0.004 -0.320
drctnrvr:SE 0.155 -0.656 -0.425 0.000
Now children, x-axis
sd_x_child <- lmer(sd_x ~ direction * language + age + (1|participant) + (1|story) + (1|repetition) + (1|trial), data = filter(sd, agegroup == "child"))
summary(sd_x_child)Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: sd_x ~ direction * language + age + (1 | participant) + (1 |
story) + (1 | repetition) + (1 | trial)
Data: filter(sd, agegroup == "child")
REML criterion at convergence: 5010.9
Scaled residuals:
Min 1Q Median 3Q Max
-1.4383 -0.5518 -0.2357 0.2077 7.2716
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 26.257 5.124
trial (Intercept) 9.714 3.117
story (Intercept) 6.717 2.592
repetition (Intercept) 0.000 0.000
Residual 537.606 23.186
Number of obs: 548, groups: participant, 36; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 40.6900 4.6287 44.1961 8.791 2.93e-11 ***
directionreversed 4.7157 3.3232 161.4753 1.419 0.1578
languageSE -2.3699 3.3429 78.2394 -0.709 0.4805
age -1.7091 0.7285 31.9776 -2.346 0.0253 *
directionreversed:languageSE -0.1704 4.0798 487.9750 -0.042 0.9667
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) drctnr lnggSE age
dirctnrvrsd -0.358
languageSE -0.383 0.443
age -0.782 -0.003 -0.052
drctnrvr:SE 0.255 -0.724 -0.607 0.007
And children, y-axis
sd_y_child <- lmer(sd_y ~ direction * language + age + (1|participant) + (1|story) + (1|repetition) + (1|trial), data = filter(sd, agegroup == "child"))
summary(sd_y_child)Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: sd_y ~ direction * language + age + (1 | participant) + (1 |
story) + (1 | repetition) + (1 | trial)
Data: filter(sd, agegroup == "child")
REML criterion at convergence: 5241.5
Scaled residuals:
Min 1Q Median 3Q Max
-2.0820 -0.6600 -0.1660 0.4132 4.4915
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 160.2457 12.6588
trial (Intercept) 0.1567 0.3958
story (Intercept) 16.1688 4.0210
repetition (Intercept) 0.0000 0.0000
Residual 786.8488 28.0508
Number of obs: 548, groups: participant, 36; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 66.904 8.063 38.681 8.297 4.06e-10 ***
directionreversed 5.329 3.799 149.626 1.403 0.1627
languageSE -11.871 5.510 51.470 -2.154 0.0359 *
age -3.007 1.351 33.222 -2.226 0.0329 *
directionreversed:languageSE -2.624 4.951 497.195 -0.530 0.5964
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) drctnr lnggSE age
dirctnrvrsd -0.234
languageSE -0.356 0.348
age -0.834 -0.003 -0.052
drctnrvr:SE 0.178 -0.770 -0.447 0.005
And now all babies/children, x axis
sd_x_all <- lmer(sd_x ~ direction * language * agegroup + (1|participant) + (1|story) + (1|repetition) + (1|trial), data = sd)
summary(sd_x_all)Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: sd_x ~ direction * language * agegroup + (1 | participant) +
(1 | story) + (1 | repetition) + (1 | trial)
Data: sd
REML criterion at convergence: 9071.3
Scaled residuals:
Min 1Q Median 3Q Max
-2.1383 -0.5533 -0.2182 0.3173 6.9530
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 8.249e+01 9.082e+00
trial (Intercept) 9.163e+00 3.027e+00
story (Intercept) 8.910e+00 2.985e+00
repetition (Intercept) 4.824e-12 2.196e-06
Residual 6.008e+02 2.451e+01
Number of obs: 977, groups: participant, 64; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 32.0506 3.5778 87.7215 8.958 5.13e-14 ***
directionreversed 5.0340 3.4534 377.1599 1.458 0.145755
languageSE -2.8003 4.3013 105.2154 -0.651 0.516442
agegroupbaby 16.1880 4.5566 103.8341 3.553 0.000575 ***
directionreversed:languageSE -0.0397 4.2981 899.2969 -0.009 0.992633
directionreversed:agegroupbaby -6.3681 4.5293 899.4382 -1.406 0.160075
languageSE:agegroupbaby -3.5466 6.4723 104.2379 -0.548 0.584888
directionreversed:languageSE:agegroupbaby 1.6496 6.4286 900.0159 0.257 0.797550
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) drctnr lnggSE aggrpb drc:SE drctn: lngSE:
dirctnrvrsd -0.485
languageSE -0.705 0.366
agegroupbby -0.663 0.341 0.552
drctnrvr:SE 0.354 -0.730 -0.496 -0.275
drctnrvrsd: 0.331 -0.682 -0.275 -0.491 0.548
lnggSE:ggrp 0.469 -0.244 -0.665 -0.705 0.330 0.347
drctnrv:SE: -0.237 0.488 0.332 0.348 -0.668 -0.708 -0.493
sd_y_all <- lmer(sd_y*2 ~ direction * language * agegroup + (1|participant) + (1|story) + (1|repetition) + (1|trial), data = sd)
summary(sd_y_all)Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: sd_y * 2 ~ direction * language * agegroup + (1 | participant) +
(1 | story) + (1 | repetition) + (1 | trial)
Data: sd
REML criterion at convergence: 10753.1
Scaled residuals:
Min 1Q Median 3Q Max
-2.3003 -0.6570 -0.1713 0.4813 5.5326
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 726.90 26.961
trial (Intercept) 3.35 1.830
story (Intercept) 71.54 8.458
repetition (Intercept) 0.00 0.000
Residual 3359.64 57.962
Number of obs: 977, groups: participant, 64; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 103.844 9.362 83.443 11.092 <2e-16 ***
directionreversed 10.616 7.814 353.900 1.359 0.1752
languageSE -25.070 11.570 91.671 -2.167 0.0329 *
agegroupbaby 26.766 12.265 90.663 2.182 0.0317 *
directionreversed:languageSE -5.098 10.174 899.669 -0.501 0.6164
directionreversed:agegroupbaby -7.518 10.714 897.935 -0.702 0.4830
languageSE:agegroupbaby -3.636 17.419 90.947 -0.209 0.8351
directionreversed:languageSE:agegroupbaby 10.923 15.208 898.463 0.718 0.4728
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) drctnr lnggSE aggrpb drc:SE drctn: lngSE:
dirctnrvrsd -0.420
languageSE -0.724 0.338
agegroupbby -0.681 0.314 0.551
drctnrvr:SE 0.321 -0.764 -0.437 -0.242
drctnrvrsd: 0.299 -0.713 -0.243 -0.432 0.549
lnggSE:ggrp 0.481 -0.225 -0.664 -0.705 0.290 0.305
drctnrv:SE: -0.214 0.510 0.292 0.306 -0.668 -0.708 -0.434
So there is no effect of language or direction (or interactions) on the standard deviation. Among babies, there seems to be an effect of age on y-axis viewing space such that it narrows the older the baby is (p = 0.046).
When we put babies and children together, we see age group differences for sd_x and sd_y, not surprisingly.
Now let’s get the middle 50% (aka the IQR) of x and y for each participant’s story (we’ve already trimmed the first 60 samples). That should also take care of further weird outliers. And we are defining “viewing space” as the IQR of the x and y axis.
iqr <- data %>%
group_by(participant,trial) %>%
summarise(xIQR = IQR(x,na.rm=TRUE),
yIQR = IQR(y,na.rm=TRUE),
xmed = median(x, na.rm=TRUE),
ymed = median(y, na.rm=TRUE)) %>%
ungroup()
head(iqr,10)# Join participant info back into IQR table
participantinfo <- data %>%
select(participant, trial, age, group, gender, language, story, direction, mark, repetition) %>%
distinct()
iqr <- left_join(iqr, participantinfo, by = c("participant","trial"))And check out the histograms. I truncated the y-axis at 100 to better see outliers.
iqr %>%
gather(axis,iqr,xIQR:yIQR) %>%
ggplot(aes(x=iqr,fill=axis)) + geom_histogram() + facet_grid(axis~.) +
coord_cartesian(ylim = c(0,50))So we see some outliers. Who are those? Let’s get a table of them. Review those AFTER I’ve done the cleanups of course.
xoutliers <- iqr %>%
arrange(desc(xIQR)) %>%
slice(1:10)
youtliers <- iqr %>%
arrange(desc(yIQR)) %>%
slice(1:10)Next, check the medians.
iqr %>%
gather(axis,med,xmed:ymed) %>%
ggplot(aes(x=med,fill=axis)) + geom_histogram() + facet_grid(axis~.) +
coord_cartesian(ylim = c(0,50))SO I need to review those too. After cleaning up/removing kids.
iqr.gather <- iqr %>% gather(axis,value,xIQR:ymed)
iqr.iqr <- filter(iqr.gather,axis=="xIQR" | axis=="yIQR")
iqr.med <- filter(iqr.gather,axis=="xmed" | axis=="ymed")
ggplot(iqr.iqr,aes(x=language,y=value,fill=direction)) +
geom_boxplot() + theme(axis.text.x=element_text(angle=45,hjust=1)) +
facet_grid(.~axis)And the median x and y position (this assumes all calibrations are correct):
ggplot(iqr.med,aes(x=language,y=value,fill=direction)) +
geom_boxplot() + theme(axis.text.x=element_text(angle=45,hjust=1)) +
facet_grid(.~axis)First, does reversal and language experience have an effect on X IQR? We have random intercepts for each participant and media, and a random slope adjustment for reversed for each participant.
xiqr.reversal <- lmer(xIQR ~ direction * language + (direction|participant) + (1|story), data = iqr)unable to evaluate scaled gradientModel failed to converge: degenerate Hessian with 1 negative eigenvaluesModel failed to converge with 1 negative eigenvalue: -9.7e+01
summary(xiqr.reversal)$coefficients Estimate Std. Error df t value Pr(>|t|)
(Intercept) 38.655208 1.886879 910.8478 20.4863225 5.216794e-77
directionreversed 3.314929 3.083010 102.9577 1.0752249 2.847874e-01
languageSE -7.206389 2.631423 910.8478 -2.7385905 6.290747e-03
directionreversed:languageSE -1.948684 4.296866 103.2962 -0.4535129 6.511308e-01
That’s fine, we’re not exactly predicting changes along the x-axis. The y-axis is what we are really interested in! :)
yiqr.reversal <- lmer(yIQR ~ direction * language + (direction|participant) + (1|story), data = iqr)
summary(yiqr.reversal)$coefficients Estimate Std. Error df t value Pr(>|t|)
(Intercept) 63.299780 5.031539 60.19619 12.580599 1.691476e-18
directionreversed 9.214504 4.874672 287.80078 1.890282 5.972484e-02
languageSE -17.653298 6.825019 69.32015 -2.586557 1.179612e-02
directionreversed:languageSE -8.795846 6.764184 291.58582 -1.300356 1.945059e-01
I want to learn how to make rectangle plots so here we go. Using each participant’s four x and y medians and 4 x and y IQRs (one set for each story, for 4 stories). So I can get the logic and code down. Let’s assume all calibrations were correct. Here’s the chart for the whole media size of 1440x1080 (as reported in Tobii).
# In this order, we'll get a grand median by taking a participant's median across their 4 stories, than the median for forward and reverse across all participants.
medians <- iqr %>%
group_by(participant,direction) %>%
summarise(xIQR = median(xIQR,na.rm=TRUE),
yIQR = median(yIQR,na.rm=TRUE),
xmed = median(xmed,na.rm=TRUE),
ymed = median(ymed,na.rm=TRUE)) %>%
group_by(direction) %>%
summarise(xIQR = median(xIQR,na.rm=TRUE),
yIQR = median(yIQR,na.rm=TRUE),
x = median(xmed,na.rm=TRUE),
y = median(ymed,na.rm=TRUE))
medians <- medians %>%
mutate(y = y*-1,
xmin = x-(xIQR/2),
xmax = x+(xIQR/2),
ymin = y-(yIQR/2),
ymax = y+(yIQR/2))
img <- readPNG("cindy.png")
g <- rasterGrob(img, interpolate=TRUE, width=unit(1,"npc"), height=unit(1,"npc"))
ggplot(medians, aes(fill=direction,color=direction)) +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) +
theme_minimal() + xlim(0,1440) + ylim(-1080,0)# ggplot(iqr.global, aes(fill=direction,color=direction)) +
# annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
# geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) +
# theme_minimal() + xlim(0,1440) + ylim(-1080,0) +
# geom_hline(yintercept=-1080+885) +
# geom_hline(yintercept=-1080+525) +
# annotate(geom="text", x = 300, y = -1080+555, label = "upper shoulder point") +
# annotate(geom="point", x = 535, y = -1080+525) +
# annotate(geom="text", x = 535, y = -1080+910, label = "height line") +
# annotate(geom="rect", xmin = 535, xmax = 535+365, ymin = -525-551, ymax = -1080+525, fill="maroon", color="black", alpha=0.5) +
# annotate(geom="text", x = 700, y = -900, label = "torso")Now let’s see the variation in viewing spaces for all our individuals. Should be fun.
iqr.individuals <- iqr %>%
rename(x = xmed,
y = ymed) %>%
mutate(y = y*-1,
xmin = x-(xIQR/2),
xmax = x+(xIQR/2),
ymin = y-(yIQR/2),
ymax = y+(yIQR/2))
ggplot(iqr.individuals, aes(fill=direction,color=direction)) +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) +
theme_minimal() + xlim(0,1440) + ylim(-1080,0) + facet_wrap("direction") +
ggtitle("with IQRs")Now let’s make Outer Limits charts which is IQR +/- 2 SDs. But I want to change that because I don’t like the idea of mixing IQRs and SDs.
# sd.individuals <- select(sd.individuals,participant,media,xsd,ysd)
# iqrsd.individuals <- left_join(iqr.individuals,sd.individuals,by=c("participant","media")) %>%
# mutate(xmin = xmin-(2*xsd),
# xmax = xmax+(2*xsd),
# ymin = ymin-(2*ysd),
# ymax = ymax+(2*ysd))
#
# ggplot(iqrsd.individuals, aes(fill=direction,color=direction)) +
# annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
# geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) +
# theme_minimal() + xlim(0,1440) + ylim(-1080,0) + facet_wrap("direction") +
# ggtitle("with SDs")# iqrsd.individuals <- iqrsd.individuals %>%
# group_by(direction) %>%
# dplyr::summarize(x = mean(x,na.rm=TRUE),
# y = mean(y,na.rm=TRUE),
# xmin = mean(xmin,na.rm=TRUE),
# ymin = mean(ymin,na.rm=TRUE),
# xmax = mean(xmax,na.rm=TRUE),
# ymax = mean(ymax,na.rm=TRUE))
# ggplot(iqrsd.individuals, aes(fill=direction,color=direction)) +
# annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
# geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) +
# theme_minimal() + xlim(0,1440) + ylim(-1080,0) + facet_wrap("direction") +
# ggtitle("Average of above chart (rain's outer limits)")Just read this good post about plotting within-subject variation and saw it can really apply to this dataset. So I’m going to try out one example.
We know the y means aren’t significantly different across groups but that’s been hard to visualize on a per-subject basis (or even from an error bar chart since the error bars are so small). Let’s visualize that!
# First get the mean of each trial, THEN the participant-level means
within_subjects <- data %>%
group_by(participant, agegroup, language, direction, trial) %>%
summarise(y = mean(y, na.rm = TRUE),
count = n()) %>%
group_by(participant, agegroup, language, direction) %>%
summarise(mean = mean(y, na.rm = TRUE),
se = sd(y, na.rm = TRUE)/sqrt(n()),
count = n())
# Then spread out mean and SE columns by direction
within_subjects_means <- within_subjects %>%
select(-se, -count) %>%
spread(direction, mean, sep = "_")
within_subjects_se <- within_subjects %>%
select(-mean, -count) %>%
spread(direction, se, sep = "SE")
within_subjects <- left_join(within_subjects_means, within_subjects_se)Joining, by = c("participant", "agegroup", "language")
# Now let's plot just the means
lims <- c(min(data$y, na.rm = T)+100, max(data$y, na.rm = T)-200)
within_subjects %>%
ggplot(aes(x = direction_forward, y = direction_reversed, color = language)) +
geom_point() +
geom_abline() +
theme(aspect.ratio = 1) +
scale_x_continuous("forward", limits = lims) +
scale_y_continuous("reversed", limits = lims) +
ggtitle("Y Axis Means") +
facet_wrap("agegroup")# now with error bars
within_subjects %>%
ggplot(aes(x = direction_forward, y = direction_reversed, color = language)) +
geom_point(size = 2) +
geom_errorbar(aes(ymin=direction_reversed-directionSEreversed, ymax=direction_reversed+directionSEreversed)) +
geom_errorbarh(aes(xmin=direction_forward-directionSEforward, xmax=direction_forward+directionSEforward)) +
geom_abline() +
theme(aspect.ratio = 1) +
scale_x_continuous("forward", limits = c(140,460)) +
scale_y_continuous("reversed", limits = c(140,460)) +
ggtitle("Y Axis Means") +
facet_wrap("agegroup")